from datetime import timedelta
from utils.operators.cluster_for_spark_sql_operator import SparkSqlOperator
from jms.dwd.oms.dwd_yl_oms_oms_order_incre_dt import jms_dwd__dwd_yl_oms_oms_order_incre_dt
from jms.dim.dim_network_whole_massage import jms_dim__dim_network_whole_massage
from jms.dm.dm_outport_effect_summary_dt_08_30 import jms_dm__dm_outport_effect_summary_dt_08_30


jms_dm__dm_order_taking_delivery_summary_dt = SparkSqlOperator(
    task_id='jms_dm__dm_order_taking_delivery_summary_dt',
    task_concurrency=1,
    pool_slots=3,
    master='yarn',
    execution_timeout=timedelta(minutes=50),
    email=['matthew.xiong@jtexpress.com', 'yl_bigdata@yl-scm.com'],
    name='jms_dm__dm_order_taking_delivery_summary_dt_{{ execution_date | date_add(1) | cst_ds }}',
    sql='jms/dm/dm_order_taking_delivery_summary_dt/execute.hql',
    driver_memory='4G',
    executor_cores=4,
    executor_memory='12G',
    num_executors=12,  # spark.dynamicAllocation.enabled 为 True 时，num_executors 表示最少 Executor 数
    conf={'spark.dynamicAllocation.enabled': 'true',  # 动态资源开启
          'spark.shuffle.service.enabled': 'true',  # 动态资源 Shuffle 服务开启
          'spark.dynamicAllocation.maxExecutors': 24,  # 动态资源最大扩容 Executor 数
          'spark.dynamicAllocation.cachedExecutorIdleTimeout': 180,  # 动态资源自动释放闲置 Executor 的超时时间(s)
          'spark.sql.sources.partitionOverwriteMode': 'dynamic',  # 允许删改已存在的分区
          'spark.executor.memoryOverhead': '2G',  # 堆外内存
          'spark.sql.shuffle.partitions': 400,
          'spark.default.paralleism': 1200,
          'spark.hadoop.hive.exec.dynamic.partition.mode': 'true',
          'spark.network.timeout': 900,
          'spark.core.connection.ack.wait.timeout': 300,
          'spark.sql.autoBroadcastJoinThreshold': 104857600,
          },
    # hiveconf={'hive.exec.dynamic.partition': 'true',  # 动态分区
    #           'hive.exec.dynamic.partition.mode': 'nonstrict',
    #           'hive.exec.max.dynamic.partitions': 20,  # 每天生成 20 个分区
    #           'hive.exec.max.dynamic.partitions.pernode': 20,  # 每天生成 20 个分区
    #           'hive.merge.mapredfiles':'true',
    #           'hive.merge.mapfiles':'true',
    #           'hive.merge.size.per.task': 128000000,
    #           "hive.merge.smallfiles.avgsize":128000000,
    #           },
    yarn_queue='pro',
)

# 设置依赖
jms_dm__dm_order_taking_delivery_summary_dt << [
    jms_dwd__dwd_yl_oms_oms_order_incre_dt,
    jms_dim__dim_network_whole_massage,
    jms_dm__dm_outport_effect_summary_dt_08_30
]